knitr::opts_chunk$set(echo = T, root.dir = here::here())
# knitr::opts_knit$set(echo = T, root.dir = here::here())
library(dplyr)
library(ggplot2)
library(googledrive)
library(patchwork)
set.seed(2019)Merge all cell-type and tissue enrichment datasets for meta-analysis.
# Table 1
googledrive::drive_download("https://docs.google.com/spreadsheets/d/18bKV3mUuyIv1yPN7gps9HPmhROb4jz0x7KxtA8mf0iM/edit#gid=743954199", path = here::here("data/metaanalysis/TableS1.xlsx"), overwrite = T)## ! Using an auto-discovered, cached token.
## To suppress this message, modify your code or options to clearly consent to
## the use of a cached token.
## See gargle's "Non-interactive auth" vignette for more details:
## <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The googledrive package is using a cached token for
## 'brian_schilder@alumni.brown.edu'.
## File downloaded:
## • 'TableS1' <id: 18bKV3mUuyIv1yPN7gps9HPmhROb4jz0x7KxtA8mf0iM>
## Saved locally as:
## • '/Desktop/PD_omics_review/data/metaanalysis/TableS1.xlsx'
# Table 2
googledrive::drive_download("https://docs.google.com/spreadsheets/d/19jz9l2P7W2f1PWT9t3x0L8h6VQL021RdRI7D4FAOjX0/edit#gid=314686952", path = here::here("data/metaanalysis/TableS2.xlsx"), overwrite = T)## File downloaded:
## • 'TableS2' <id: 19jz9l2P7W2f1PWT9t3x0L8h6VQL021RdRI7D4FAOjX0>
## Saved locally as:
## • '/Desktop/PD_omics_review/data/metaanalysis/TableS2.xlsx'
sheets <- readxl::excel_sheets(here::here("data/metaanalysis/TableS1.xlsx")) celltype_sheets <- sheets[endsWith(sheets,"celltypes")]
celltype_data <- lapply(celltype_sheets, function(sheet){
print(sheet)
dat <- readxl::read_excel(here::here("data/metaanalysis/TableS1.xlsx"), sheet = sheet)
if(sheet=="Nalls2019_celltypes") dat <- subset(dat, Source!="step1_2_summary.txt" ) # & step3==1
dat$Sheet <- gsub("_celltypes","",sheet)
cols <- c("Cell_Type","Sig","Reference","Source","Dataset")
cols <- cols[cols %in% colnames(dat)]
return(dplyr::relocate(dat, all_of(cols), .before = 1))
}) %>% `names<-`(celltype_sheets) %>%
data.table::rbindlist(fill = T)## [1] "Bryois2020_celltypes"
## [1] "Reynolds2019_celltypes"
## [1] "Corces2020_celltypes"
## [1] "Schilder2020_celltypes"
## [1] "Li2021_celltypes"
## [1] "Nalls2019_celltypes"
## New names:
## * `` -> ...17
## [1] "Agarwal2020_celltypes"
## [1] "Nott2019_celltypes"
## [1] "Coetzee2016_celltypes"
## [1] "Li2019_celltypes"
plot_fdr <- function(dat,
x="Score",
y="-log10(FDR)",
color="Sheet",
size=y,
label=NULL,
show_plot=FALSE){
gg <- ggplot(dat,
aes_string(x=x, y=y, color=color, size=size,
label=label)) +
geom_point(alpha=.5) +
theme_bw()
if(show_plot) print(gg)
return(gg)
}plot_celltypes <- celltype_data %>%
subset(!is.na(Sig) & !is.na(Cell_Type) & !( Cell_Type %in% c("brain"))) %>%
dplyr::mutate( Sig_neglog = -log1p(Sig)) %>%
dplyr::group_by(Reference, .drop = F) %>%
dplyr::mutate(Cell_Type= paste0(Cell_Type,"@",group_indices())) %>%
## Calculate zscore for fine-mapping overlap
dplyr::summarise(Cell_Type,Source,Sheet,Dataset,Sig,Count,
FDR = p.adjust(p = Sig, method = "fdr"),
bonf = p.adjust(p = Sig, method = "bonf"),
n_tests = n()) %>%
dplyr::mutate(Score = ifelse(Reference=="Schilder et al, 2020 (bioRxiv)",
as.numeric(cut(Count, 10))/10,
scales::rescale(-log10(FDR), c(0.0000000001,1))) ) %>%
dplyr::ungroup() %>%
dplyr::mutate(FDR_all = p.adjust(p = Sig, method = "fdr")) %>%
subset(FDR_all<.05 & FDR<.05 & Sig < 0.05 & (!is.infinite(Score))) ## Warning: `group_indices()` was deprecated in dplyr 1.0.0.
## Please use `cur_group_id()` instead.
## `summarise()` has grouped output by 'Reference'. You can override using the `.groups` argument.
# Sort by medians
plot_celltypes <- plot_celltypes %>%
dplyr::group_by(Reference, Cell_Type) %>%
dplyr::summarise(mean_Score = mean(Score, na.rm=T),
median_Score = median(Score, na.rm=T),
.groups = "keep") %>%
merge(plot_celltypes, by=c("Reference","Cell_Type")) %>%
dplyr::arrange(Reference, desc(median_Score), bonf)
plot_celltypes$Cell_Type <- factor(plot_celltypes$Cell_Type,
levels = rev(unique(plot_celltypes$Cell_Type)), ordered = T)
gg_FDR1 <- gg_FDR2 <- plot_fdr(dat = plot_celltypes, y="FDR", label = "Cell_Type")
plotly::ggplotly(gg_FDR1)gg_FDR2 <- plot_fdr(dat = plot_celltypes, label = "Cell_Type")
plotly::ggplotly(gg_FDR2)celltype_counts <- plot_celltypes %>% dplyr::group_by(Reference) %>%
dplyr::summarise(n_celltypes=dplyr::n_distinct(Cell_Type)) %>%
dplyr::arrange(desc(n_celltypes))
knitr::kable(celltype_counts)| Reference | n_celltypes |
|---|---|
| Nalls et al, 2019 (Lancet Neurology) | 116 |
| Schilder et al, 2020 (bioRxiv) | 8 |
| Coetzee et al, 2016 (Scientific Reports) | 7 |
| Reynolds et al. 2019 (npj Parkinson’s Disease) | 4 |
| Bryois et al, 2020 (Nature Genetics) | 3 |
| Li et al, 2019 (Nature Communications) | 3 |
| Nott et al, 2019 (Science) | 3 |
| Agarwal et al, 2020 (Nature Communications) | 2 |
Summarise the top celltypes/tissues per study according to abstracted “Score” value (computed here).
### Limit the number of celltypes/study for visualization purposes
max_celltypes <- 25
celltype_plot <- ggplot(plot_celltypes %>%
dplyr::group_by(Reference) %>%
dplyr::slice_max(order_by = median_Score, n = max_celltypes),
aes(x=Score, y=Cell_Type, fill=Reference)) +
# geom_bar(show.legend = F, stat="identity") +
geom_boxplot(show.legend = F) +
# geom_violin(show.legend = F) +
geom_point(show.legend = F, alpha=.2) +
# Create a dicionary mapping the new labels
scale_y_discrete(labels = setNames(gsub("@.*","",levels(plot_celltypes$Cell_Type )),
levels(plot_celltypes$Cell_Type) ) ) +
# facet_wrap(facets = .~paste(gsub("[(]","\n(",Reference),"\n\n n tests =",n_tests),
# scales = "free", drop = F, nrow = 1) +
facet_grid(facets = paste0(gsub("[(]","\n(",Sheet),"\n (tests = ",n_tests,")")~.,
scales = "free", space="free", drop = F) +
theme_bw() +
theme(strip.background = element_rect(fill = "black"),
strip.text = element_text(color = "white", angle = 0),
strip.text.y = element_text(color = "white", angle = 0),
panel.grid.minor.x = element_blank())
print(celltype_plot)# ggsave(here::here("plots/metaanalysis/celltype_metaanalysis.pdf"),celltype_plot, dpi = 300, height = 15)tissue_sheets <- sheets[endsWith(sheets,"tissues")]
tissue_data <- lapply(tissue_sheets, function(sheet){
print(sheet)
dat <- readxl::read_excel(here::here("data/metaanalysis/TableS1.xlsx"), sheet = sheet)
dat$Sheet <- gsub("_tissues","",sheet)
cols <- c("Tissue","Sig","Reference","Source","Dataset")
cols <- cols[cols %in% colnames(dat)]
return(dplyr::relocate(dat, all_of(cols), .before = 1))
}) %>% `names<-`(tissue_sheets) %>%
data.table::rbindlist(fill = T)## [1] "Bryois2020_tissues"
## [1] "Reynolds2019_tissues"
## [1] "Corces2020_tissues"
## [1] "Schilder2020_tissues"
## [1] "Nalls2019_tissues"
## [1] "Agarwal2020_tissues"
## [1] "Nott2019_tissues"
## [1] "Coetzee2016_tissues"
## [1] "Li2019_tissues"
plot_tissues <- tissue_data %>%
subset(!(Tissue %in% c(NA,"NA")) & (!is.na(Sig))) %>%
dplyr::mutate( Sig_neglog = -log1p(Sig)) %>%
## Calculate zscore for fine-mapping overlap
dplyr::group_by(Reference, .drop = F) %>%
dplyr::mutate(Tissue= paste0(Tissue,"@",group_indices())) %>%
dplyr::summarise(Tissue,Source,Sheet,Dataset,Sig,PP.H4,
FDR = p.adjust(p = Sig, method = "fdr"),
bonf = p.adjust(p = Sig, method = "bonf"),
n_tests = n(),
.groups = "keep") %>%
# dplyr::ungroup() %>%
dplyr::mutate(Score = ifelse(Reference=="Schilder et al, 2020 (bioRxiv)",
as.numeric(cut(PP.H4, 10))/10,
scales::rescale(-log(FDR), c(0.0000000001,1))) ) %>%
dplyr::ungroup() %>%
dplyr::mutate(FDR_all = p.adjust(p = Sig, method = "fdr")) %>%
subset((FDR_all<.05 & FDR<.05 & Sig < 0.05) | (PP.H4>.8))
# Sort by medians
plot_tissues <- plot_tissues %>%
dplyr::group_by(Reference, Tissue, .drop = F) %>%
dplyr::summarise(mean_Score = mean(Score, na.rm=T),
median_Score = median(Score, na.rm=T),
n_tissues = n_distinct(Tissue),
.groups = "keep") %>%
merge(plot_tissues, by=c("Reference","Tissue"), all.y = T) %>%
dplyr::arrange(Reference, desc(median_Score), bonf)
plot_tissues$Tissue <- factor(plot_tissues$Tissue,
levels = rev(unique(plot_tissues$Tissue)), ordered = T)
gg_FDR1 <- plot_fdr(dat = plot_tissues, y="FDR", label = "Tissue")
plotly::ggplotly(gg_FDR1)gg_FDR2 <- plot_fdr(dat = plot_tissues, label = "Tissue")
plotly::ggplotly(gg_FDR2) plot_tissues %>% dplyr::group_by(Reference) %>%
dplyr::summarise(n_tissues=dplyr::n_distinct(Tissue)) %>%
dplyr::arrange(desc(n_tissues))## # A tibble: 7 × 2
## Reference n_tissues
## <chr> <int>
## 1 Reynolds et al. 2019 (npj Parkinson's Disease) 29
## 2 Nalls et al, 2019 (Lancet Neurology) 15
## 3 Coetzee et al, 2016 (Scientific Reports) 6
## 4 Bryois et al, 2020 (Nature Genetics) 2
## 5 Agarwal et al, 2020 (Nature Communications) 1
## 6 Li et al, 2019 (Nature Communications) 1
## 7 Nott et al, 2019 (Science) 1
# plot_tissues[is.na(plot_tissues$Dataset),"Dataset"] <- plot_tissues[is.na(plot_tissues$Dataset),"Sheet"]
tissue_plot <- ggplot(plot_tissues,
aes(x=Score, y=Tissue, fill=Reference)) +
# geom_bar(show.legend = F, stat="identity") +
geom_boxplot(show.legend = F) +
# geom_violin(show.legend = F) +
geom_point(show.legend = F, alpha=.2) +
# facet_wrap(facets = .~paste(gsub("[(]","\n(",Reference),"\n\n n tests =",n_tests),
# scales = "free", drop = F, nrow = 1) +
facet_grid(facets = paste0(gsub("[(]","\n(",Sheet),"\n (tests = ",n_tests,")")~.,
scales = "free", space="free", drop = F) +
scale_x_continuous(limits = c(0,1)) +
# Create a dicionary mapping the new labels
scale_y_discrete(labels = setNames(gsub("@.*","",levels(plot_tissues$Tissue)),
levels(plot_tissues$Tissue) ) ) +
theme_bw() +
theme(strip.background = element_rect(fill = "black"),
strip.text = element_text(color = "white", angle = 0),
strip.text.y = element_text(color = "white", angle = 0),
panel.grid.minor.x = element_blank())
print(tissue_plot)# ggsave("plots/metaanalysis/tissue_metaanalysis.pdf",tissue_plot, dpi = 300, height = 15)fused_plot <- (celltype_plot + labs(title="Cell-types") +
tissue_plot + labs(title="Tissues")) +
patchwork::plot_annotation(tag_levels = letters)
print(fused_plot)## Warning: Removed 27 rows containing non-finite values (stat_boxplot).
# ggsave(here::here("plots/metaanalysis/metaanalysis.pdf"),fused_plot, height = 14, width = 15, dpi = 300)
# ggsave(here::here("plots/metaanalysis/metaanalysis.jpg"),fused_plot, height = 14, width = 15, dpi = 300)utils::sessionInfo()## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.1 googledrive_2.0.0 ggplot2_3.3.5 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.1.1 xfun_0.25 bslib_0.2.5.1 purrr_0.3.4
## [5] gargle_1.2.0 colorspace_2.0-2 vctrs_0.3.8 generics_0.1.0
## [9] viridisLite_0.4.0 htmltools_0.5.1.1 yaml_2.2.1 utf8_1.2.2
## [13] plotly_4.9.4.9000 rlang_0.4.11 jquerylib_0.1.4 pillar_1.6.2
## [17] glue_1.4.2 withr_2.4.2 DBI_1.1.1 rappdirs_0.3.3
## [21] readxl_1.3.1 lifecycle_1.0.0 stringr_1.4.0 munsell_0.5.0
## [25] gtable_0.3.0 cellranger_1.1.0 htmlwidgets_1.5.3 evaluate_0.14
## [29] labeling_0.4.2 knitr_1.33 crosstalk_1.1.1 curl_4.3.2
## [33] fansi_0.5.0 highr_0.9 Rcpp_1.0.7 openssl_1.4.4
## [37] scales_1.1.1 jsonlite_1.7.2 farver_2.1.0 fs_1.5.0
## [41] askpass_1.1 digest_0.6.27 stringi_1.7.3 grid_4.1.0
## [45] rprojroot_2.0.2 here_1.0.1 cli_3.0.1 tools_4.1.0
## [49] magrittr_2.0.1 sass_0.4.0 lazyeval_0.2.2 tibble_3.1.3
## [53] tidyr_1.1.3 crayon_1.4.1 pkgconfig_2.0.3 ellipsis_0.3.2
## [57] data.table_1.14.0 assertthat_0.2.1 rmarkdown_2.10 httr_1.4.2
## [61] rstudioapi_0.13 R6_2.5.0 compiler_4.1.0